suppressPackageStartupMessages(library(tidyverse))
devtools::load_all('~/Google Drive/My Drive/Scripts/R_packages/myUtilities/')
## ℹ Loading myUtilities
Settings
data_dir <- '/Volumes/Mitsu_NGS_3/METTL2A/'
wd <- "~/Google Drive/My Drive/Analysis/METTL2A/"
setwd(wd)
figdir <- paste0(wd, 'Figures/DRS_seqcontent/')
tabledir <- paste0(wd, 'Tables/DRS/Seq_content/')
theme_set(
theme_classic(base_size = 7) +
theme(legend.position = 'bottom')
)
Functions
calc_base_percentage <- function(df) {
df |>
select(-seq) |>
pivot_longer(
cols = -c(transcript_id, seq_length),
names_to = 'pattern', values_to = 'num',
names_pattern = "(.*)_num"
) |>
mutate(
# maximum number == seq_length (if single base (i.e. A, C, G, T))
# maximum number == seq_length - 1 (if two bases (i.e. CC))
percent = 100 * num / (seq_length - str_length(pattern) + 1)
) |>
pivot_wider(
id_cols = c(transcript_id, seq_length),
names_from = pattern, values_from = percent, names_glue = "{pattern}_percent"
)
}
Read data
espresso_quantification <-
read_tsv(
paste0(wd, 'Tables/Espresso/espresso_deseq2_genetype2_isDET_2024-04-18.tsv')
)
## Rows: 36717 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (11): transcript_id, transcript_type, transcript_name, gene_id, gene_typ...
## dbl (18): siMETTL2A_baseMean, siMETTL2A_log2FoldChange, siMETTL2A_lfcSE, siM...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
espresso_quantification
## # A tibble: 36,717 × 29
## transcript_id transcript_type transcript_name gene_id gene_type gene_name
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 ENST00000498442.1 retained_intron CRBN-212 ENSG00… protein_… CRBN
## 2 ENST00000459840.5 retained_intron CRBN-205 ENSG00… protein_… CRBN
## 3 ENST00000231948.9 protein_coding CRBN-201 ENSG00… protein_… CRBN
## 4 ENST00000432408.6 protein_coding CRBN-203 ENSG00… protein_… CRBN
## 5 ENST00000339437.… protein_coding TRNT1-203 ENSG00… protein_… TRNT1
## 6 ENST00000488263.5 retained_intron CRBN-209 ENSG00… protein_… CRBN
## 7 ENST00000420393.5 protein_coding TRNT1-207 ENSG00… protein_… TRNT1
## 8 ENST00000698415.1 retained_intron TRNT1-230 ENSG00… protein_… TRNT1
## 9 ENST00000450014.1 protein_coding CRBN-204 ENSG00… protein_… CRBN
## 10 ENST00000698416.1 retained_intron TRNT1-231 ENSG00… protein_… TRNT1
## # ℹ 36,707 more rows
## # ℹ 23 more variables: siMETTL2A_baseMean <dbl>,
## # siMETTL2A_log2FoldChange <dbl>, siMETTL2A_lfcSE <dbl>,
## # siMETTL2A_stat <dbl>, siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>,
## # siMETTL2A_I_baseMean <dbl>, siMETTL2A_I_log2FoldChange <dbl>,
## # siMETTL2A_I_lfcSE <dbl>, siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>,
## # siMETTL2A_I_padj <dbl>, siMETTL2A_G_baseMean <dbl>, …
m3C_transcripts <-
read_tsv(
paste0(wd, 'Tables/DRS/Positions/common_sig_seqs_in_intensity_up_2024-04-22.tsv.gz')
) |>
filter(grepl('..C..', ref_kmer)) |>
select(transcript_id) |>
mutate(is_methylated = TRUE) |>
distinct()
## Rows: 605 Columns: 65
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (30): transcript_id, transcript_name, ref_kmer, GMM_cov_type_G, cluster_...
## dbl (35): position, GMM_logit_pvalue_G, KS_dwell_pvalue_G, KS_intensity_pval...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
m3C_transcripts
## # A tibble: 71 × 2
## transcript_id is_methylated
## <chr> <lgl>
## 1 ENST00000429711.7 TRUE
## 2 ENST00000647248.2 TRUE
## 3 ENST00000389680.2 TRUE
## 4 ENST00000361390.2 TRUE
## 5 ENST00000361453.3 TRUE
## 6 ENST00000387347.2 TRUE
## 7 ENST00000361624.2 TRUE
## 8 ENST00000361739.1 TRUE
## 9 ENST00000361899.2 TRUE
## 10 ENST00000361227.2 TRUE
## # ℹ 61 more rows
intensityup_transcripts <-
read_tsv(
paste0(wd, 'Tables/DRS/Positions/common_sig_seqs_in_intensity_up_2024-04-22.tsv.gz')
) |>
#filter(grepl('..C..', ref_kmer)) |>
select(transcript_id) |>
mutate(is_intensityup = TRUE) |>
distinct()
## Rows: 605 Columns: 65
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (30): transcript_id, transcript_name, ref_kmer, GMM_cov_type_G, cluster_...
## dbl (35): position, GMM_logit_pvalue_G, KS_dwell_pvalue_G, KS_intensity_pval...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
intensityup_transcripts
## # A tibble: 85 × 2
## transcript_id is_intensityup
## <chr> <lgl>
## 1 ENST00000429711.7 TRUE
## 2 ENST00000647248.2 TRUE
## 3 ENST00000389680.2 TRUE
## 4 ENST00000361390.2 TRUE
## 5 ENST00000361453.3 TRUE
## 6 ENST00000387347.2 TRUE
## 7 ENST00000361624.2 TRUE
## 8 ENST00000361739.1 TRUE
## 9 ENST00000361899.2 TRUE
## 10 ENST00000361227.2 TRUE
## # ℹ 75 more rows
transcript_seqs <-
read_tsv(
'/Volumes/Mitsu_NGS_2/METTL2A/Database/Custom/Espresso_AsPC1/Espresso_AsPC1.transcripts.tsv',
col_names = c('transcript_id', 'seq', 'seq_length')
)
## Rows: 36717 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): transcript_id, seq
## dbl (1): seq_length
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
transcript_seqs
## # A tibble: 36,717 × 3
## transcript_id seq seq_length
## <chr> <chr> <dbl>
## 1 ENST00000339437.11 AGCCCGGAAGTGCGCGTGGCGGCGGTGGCGGCTGCGGCAACAGCGG… 987
## 2 ENST00000251607.11 AGCCCGGAAGTGCGCGTGGCGGCGGTGGCGGCTGCGGCAACAGCGG… 2252
## 3 ENST00000420393.5 CAGCGGGGCCGGTAAGCGGGCGCGCGCCGCTCAGAGGGGCAGAGTT… 854
## 4 ENST00000698415.1 GATGTATGATGAGTTTAGTTGAATGCTCGTGTTGCTGTCTGCTAGC… 6597
## 5 ENST00000698416.1 CATGACTAGTTTTGTGGGTAGCAATGATGTTTAAATGTCACACACT… 5500
## 6 ENST00000488263.5 AGGAACTTCATCATGAAGTCTCAAGTAAACGAACATTTTATCTTTC… 4528
## 7 ENST00000424814.5 GAGATCAGCAGGACGCTGCGCACAACATGGGCAACCACCTGCCGCT… 2038
## 8 ENST00000231948.9 AGACATGGCCGGCGAAGGAGATCAGCAGGACGCTGCGCACAACATG… 2187
## 9 ENST00000432408.6 GCCTCCTTTGCGGGTAAACAGACATGGCCGGCGAAGGAGATCAGCA… 2203
## 10 ENST00000459840.5 ATGGAGGCATTTAAACTGGGACTGAGATGGGACTGAGTGATTAAAT… 723
## # ℹ 36,707 more rows
Calculate the number of CC in each transcript
num_CC <-
transcript_seqs |>
mutate(
A_num = str_count(seq, 'A'),
C_num = str_count(seq, 'C'),
G_num = str_count(seq, 'G'),
T_num = str_count(seq, 'T'),
CC_num = str_count(seq, 'CC'),
CA_num = str_count(seq, 'CA'),
CG_num = str_count(seq, 'CG'),
CT_num = str_count(seq, 'CT')
)
# |>
# mutate(
# C_percent = 100 * C_num / seq_length,
# CC_percent = 100 * CC_num / (seq_length - 1)
# )
num_CC
## # A tibble: 36,717 × 11
## transcript_id seq seq_length A_num C_num G_num T_num CC_num CA_num CG_num
## <chr> <chr> <dbl> <int> <int> <int> <int> <int> <int> <int>
## 1 ENST0000033943… AGCC… 987 283 182 224 298 35 60 17
## 2 ENST0000025160… AGCC… 2252 741 353 485 673 60 131 23
## 3 ENST0000042039… CAGC… 854 192 215 244 203 53 53 45
## 4 ENST0000069841… GATG… 6597 2120 1105 1260 2112 179 447 34
## 5 ENST0000069841… CATG… 5500 1818 893 999 1790 143 369 26
## 6 ENST0000048826… AGGA… 4528 1426 736 885 1481 126 271 26
## 7 ENST0000042481… GAGA… 2038 691 368 395 584 73 138 21
## 8 ENST0000023194… AGAC… 2187 726 400 435 626 80 147 25
## 9 ENST0000043240… GCCT… 2203 728 405 439 631 82 147 26
## 10 ENST0000045984… ATGG… 723 230 132 150 211 26 42 9
## # ℹ 36,707 more rows
## # ℹ 1 more variable: CT_num <int>
percent_bases <-
num_CC |>
calc_base_percentage()
percent_bases
## # A tibble: 36,717 × 10
## transcript_id seq_length A_percent C_percent G_percent T_percent CC_percent
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ENST0000033943… 987 28.7 18.4 22.7 30.2 3.55
## 2 ENST0000025160… 2252 32.9 15.7 21.5 29.9 2.67
## 3 ENST0000042039… 854 22.5 25.2 28.6 23.8 6.21
## 4 ENST0000069841… 6597 32.1 16.8 19.1 32.0 2.71
## 5 ENST0000069841… 5500 33.1 16.2 18.2 32.5 2.60
## 6 ENST0000048826… 4528 31.5 16.3 19.5 32.7 2.78
## 7 ENST0000042481… 2038 33.9 18.1 19.4 28.7 3.58
## 8 ENST0000023194… 2187 33.2 18.3 19.9 28.6 3.66
## 9 ENST0000043240… 2203 33.0 18.4 19.9 28.6 3.72
## 10 ENST0000045984… 723 31.8 18.3 20.7 29.2 3.60
## # ℹ 36,707 more rows
## # ℹ 3 more variables: CA_percent <dbl>, CG_percent <dbl>, CT_percent <dbl>
Join information
espresso_quantification_numCC <-
espresso_quantification |>
left_join(percent_bases) |>
left_join(m3C_transcripts) |>
left_join(intensityup_transcripts) |>
replace_na(list(is_methylated = FALSE, is_intensityup = FALSE))
## Joining with `by = join_by(transcript_id)`
## Joining with `by = join_by(transcript_id)`
## Joining with `by = join_by(transcript_id)`
espresso_quantification_numCC
## # A tibble: 36,717 × 40
## transcript_id transcript_type transcript_name gene_id gene_type gene_name
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 ENST00000498442.1 retained_intron CRBN-212 ENSG00… protein_… CRBN
## 2 ENST00000459840.5 retained_intron CRBN-205 ENSG00… protein_… CRBN
## 3 ENST00000231948.9 protein_coding CRBN-201 ENSG00… protein_… CRBN
## 4 ENST00000432408.6 protein_coding CRBN-203 ENSG00… protein_… CRBN
## 5 ENST00000339437.… protein_coding TRNT1-203 ENSG00… protein_… TRNT1
## 6 ENST00000488263.5 retained_intron CRBN-209 ENSG00… protein_… CRBN
## 7 ENST00000420393.5 protein_coding TRNT1-207 ENSG00… protein_… TRNT1
## 8 ENST00000698415.1 retained_intron TRNT1-230 ENSG00… protein_… TRNT1
## 9 ENST00000450014.1 protein_coding CRBN-204 ENSG00… protein_… CRBN
## 10 ENST00000698416.1 retained_intron TRNT1-231 ENSG00… protein_… TRNT1
## # ℹ 36,707 more rows
## # ℹ 34 more variables: siMETTL2A_baseMean <dbl>,
## # siMETTL2A_log2FoldChange <dbl>, siMETTL2A_lfcSE <dbl>,
## # siMETTL2A_stat <dbl>, siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>,
## # siMETTL2A_I_baseMean <dbl>, siMETTL2A_I_log2FoldChange <dbl>,
## # siMETTL2A_I_lfcSE <dbl>, siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>,
## # siMETTL2A_I_padj <dbl>, siMETTL2A_G_baseMean <dbl>, …
colnames(espresso_quantification_numCC)
## [1] "transcript_id" "transcript_type"
## [3] "transcript_name" "gene_id"
## [5] "gene_type" "gene_name"
## [7] "siMETTL2A_baseMean" "siMETTL2A_log2FoldChange"
## [9] "siMETTL2A_lfcSE" "siMETTL2A_stat"
## [11] "siMETTL2A_pvalue" "siMETTL2A_padj"
## [13] "siMETTL2A_I_baseMean" "siMETTL2A_I_log2FoldChange"
## [15] "siMETTL2A_I_lfcSE" "siMETTL2A_I_stat"
## [17] "siMETTL2A_I_pvalue" "siMETTL2A_I_padj"
## [19] "siMETTL2A_G_baseMean" "siMETTL2A_G_log2FoldChange"
## [21] "siMETTL2A_G_lfcSE" "siMETTL2A_G_stat"
## [23] "siMETTL2A_G_pvalue" "siMETTL2A_G_padj"
## [25] "seqname" "genetype2"
## [27] "isUp" "isDown"
## [29] "common_DETs" "seq_length"
## [31] "A_percent" "C_percent"
## [33] "G_percent" "T_percent"
## [35] "CC_percent" "CA_percent"
## [37] "CG_percent" "CT_percent"
## [39] "is_methylated" "is_intensityup"
espresso_quantification_numCC |>
export_tsv(outdir = tabledir)
##
## Exported to: ~/Google Drive/My Drive/Analysis/METTL2A/Tables/DRS/Seq_content/espresso_quantification_numCC_2024-07-29.tsv
## # A tibble: 36,717 × 40
## transcript_id transcript_type transcript_name gene_id gene_type gene_name
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 ENST00000498442.1 retained_intron CRBN-212 ENSG00… protein_… CRBN
## 2 ENST00000459840.5 retained_intron CRBN-205 ENSG00… protein_… CRBN
## 3 ENST00000231948.9 protein_coding CRBN-201 ENSG00… protein_… CRBN
## 4 ENST00000432408.6 protein_coding CRBN-203 ENSG00… protein_… CRBN
## 5 ENST00000339437.… protein_coding TRNT1-203 ENSG00… protein_… TRNT1
## 6 ENST00000488263.5 retained_intron CRBN-209 ENSG00… protein_… CRBN
## 7 ENST00000420393.5 protein_coding TRNT1-207 ENSG00… protein_… TRNT1
## 8 ENST00000698415.1 retained_intron TRNT1-230 ENSG00… protein_… TRNT1
## 9 ENST00000450014.1 protein_coding CRBN-204 ENSG00… protein_… CRBN
## 10 ENST00000698416.1 retained_intron TRNT1-231 ENSG00… protein_… TRNT1
## # ℹ 36,707 more rows
## # ℹ 34 more variables: siMETTL2A_baseMean <dbl>,
## # siMETTL2A_log2FoldChange <dbl>, siMETTL2A_lfcSE <dbl>,
## # siMETTL2A_stat <dbl>, siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>,
## # siMETTL2A_I_baseMean <dbl>, siMETTL2A_I_log2FoldChange <dbl>,
## # siMETTL2A_I_lfcSE <dbl>, siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>,
## # siMETTL2A_I_padj <dbl>, siMETTL2A_G_baseMean <dbl>, …
Plot
Sina plot (methylation)
C
percent_C_methylation_sina <-
espresso_quantification_numCC |>
ggplot(aes(
x = is_methylated |> factor(levels = c('TRUE', 'FALSE')),
y = C_percent
)) +
ggforce::geom_sina(colour = 'gray', size = .2) +
geom_boxplot(width = .1, coef = Inf, lwd = .2) +
coord_flip()
percent_C_methylation_sina |>
ggsave_multiple_formats(
width = 5, height = 3, outdir = figdir, units = 'cm'
)

espresso_quantification_numCC |>
rstatix::wilcox_test(C_percent ~ is_methylated)
## # A tibble: 1 × 7
## .y. group1 group2 n1 n2 statistic p
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 C_percent FALSE TRUE 36646 71 1077184. 0.0122
CC
percent_CC_methylation_sina <-
espresso_quantification_numCC |>
ggplot(aes(
x = is_methylated |> factor(levels = c('TRUE', 'FALSE')),
y = CC_percent
)) +
ggforce::geom_sina(colour = 'gray', size = .2) +
geom_boxplot(width = .1, coef = Inf, lwd = .2) +
coord_flip()
percent_CC_methylation_sina |>
ggsave_multiple_formats(
width = 5, height = 3, outdir = figdir, units = 'cm'
)

espresso_quantification_numCC |>
rstatix::wilcox_test(CC_percent ~ is_methylated)
## # A tibble: 1 × 7
## .y. group1 group2 n1 n2 statistic p
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 CC_percent FALSE TRUE 36646 71 1079413 0.013
ECDF (methylation)
C
percent_C_methylation_ecdf <-
espresso_quantification_numCC |>
ggplot(aes(
colour = is_methylated,
x = C_percent
)) +
stat_ecdf() +
scale_color_manual(values = c('gray', '#1A8F3B'))
percent_C_methylation_ecdf |>
ggsave_multiple_formats(
width = 4, height = 3.5, outdir = figdir, units = 'cm'
)

CC
percent_CC_methylation_ecdf <-
espresso_quantification_numCC |>
ggplot(aes(
colour = is_methylated,
x = CC_percent
)) +
stat_ecdf() +
scale_color_manual(values = c('gray', '#1A8F3B'))
percent_CC_methylation_ecdf |>
ggsave_multiple_formats(
width = 4, height = 3.5, outdir = figdir, units = 'cm'
)

espresso_quantification_numCC |>
rstatix::wilcox_test(CC_percent ~ is_methylated)
## # A tibble: 1 × 7
## .y. group1 group2 n1 n2 statistic p
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 CC_percent FALSE TRUE 36646 71 1079413 0.013
CG
#@percent_CC_methylation_ecdf <-
espresso_quantification_numCC |>
ggplot(aes(
colour = is_methylated,
x = CA_percent
)) +
stat_ecdf() +
scale_color_manual(values = c('gray', '#1A8F3B'))

espresso_quantification_numCC |>
rstatix::wilcox_test(CA_percent ~ is_methylated)
## # A tibble: 1 × 7
## .y. group1 group2 n1 n2 statistic p
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 CA_percent FALSE TRUE 36646 71 1186151 0.198
Violin (common DETs + methylation)
CC, common DETs + methylation
espresso_quantification_numCC |>
ggplot(aes(
x = common_DETs, y = CC_percent,
fill = common_DETs
)) +
geom_violin() +
geom_boxplot(width = .1, outlier.colour = NA, fill = 'white') +
scale_fill_manual(values = c('blue', 'gray', 'red')) +
facet_wrap(~ is_methylated)
## Warning: Groups with fewer than two data points have been dropped.

espresso_quantification_numCC |>
group_by(is_methylated) |>
rstatix::wilcox_test(CC_percent ~ common_DETs)
## # A tibble: 6 × 10
## is_methylated .y. group1 group2 n1 n2 statistic p p.adj
## * <lgl> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 FALSE CC_percent down other 539 35699 10633184 2.68e-5 5.36e-5
## 2 FALSE CC_percent down up 539 408 132024. 1.19e-7 3.57e-7
## 3 FALSE CC_percent other up 35699 408 8104528 8.63e-5 8.63e-5
## 4 TRUE CC_percent down other 1 40 12 6.34e-1 1 e+0
## 5 TRUE CC_percent down up 1 30 8 5.81e-1 1 e+0
## 6 TRUE CC_percent other up 40 30 541 4.9 e-1 1 e+0
## # ℹ 1 more variable: p.adj.signif <chr>
espresso_quantification_numCC |>
ggplot(aes(
x = is_methylated, y = CC_percent,
fill = common_DETs
)) +
geom_violin() +
geom_boxplot(width = .1, outlier.colour = NA, fill = 'white') +
scale_fill_manual(values = c('blue', 'gray', 'red')) +
facet_wrap(~ common_DETs)
## Warning: Groups with fewer than two data points have been dropped.

espresso_quantification_numCC |>
group_by(common_DETs) |>
rstatix::wilcox_test(CC_percent ~ is_methylated)
## # A tibble: 3 × 8
## common_DETs .y. group1 group2 n1 n2 statistic p
## * <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 down CC_percent FALSE TRUE 539 1 341 0.649
## 2 other CC_percent FALSE TRUE 35699 40 606732. 0.1
## 3 up CC_percent FALSE TRUE 408 30 4181 0.00377
ECDF (intensity up)
C
percent_C_intensityup_ecdf <-
espresso_quantification_numCC |>
ggplot(aes(
colour = is_intensityup,
x = C_percent
)) +
stat_ecdf() +
scale_color_manual(values = c('gray', '#1A8F3B'))
percent_C_intensityup_ecdf |>
ggsave_multiple_formats(
width = 4, height = 3.5, outdir = figdir, units = 'cm'
)

espresso_quantification_numCC |>
rstatix::wilcox_test(C_percent ~ is_intensityup)
## # A tibble: 1 × 7
## .y. group1 group2 n1 n2 statistic p
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 C_percent FALSE TRUE 36632 85 1327510. 0.0188
CC
percent_CC_intensityup_ecdf <-
espresso_quantification_numCC |>
ggplot(aes(
colour = is_intensityup,
x = CC_percent
)) +
stat_ecdf() +
scale_color_manual(values = c('gray', '#1A8F3B'))
percent_CC_intensityup_ecdf |>
ggsave_multiple_formats(
width = 4, height = 3.5, outdir = figdir, units = 'cm'
)

espresso_quantification_numCC |>
rstatix::wilcox_test(CC_percent ~ is_intensityup)
## # A tibble: 1 × 7
## .y. group1 group2 n1 n2 statistic p
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 CC_percent FALSE TRUE 36632 85 1330268. 0.0203
Violin (commonDETs)
CC, common DETs
espresso_quantification_numCC |>
ggplot(aes(
x = common_DETs, y = CC_percent, fill = common_DETs
)) +
geom_violin() +
geom_boxplot(width = .1, coef = Inf, fill = 'white') +
scale_fill_manual(values = c('blue', 'gray', 'red'))

espresso_quantification_numCC |>
rstatix::wilcox_test(CC_percent ~ common_DETs)
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 CC_percent down other 540 35739 10659050 0.0000292 5.84e-5 ****
## 2 CC_percent down up 540 438 139620. 0.00000116 3.48e-6 ****
## 3 CC_percent other up 35739 438 8539248. 0.001 1 e-3 **
C, common DETs
espresso_quantification_numCC |>
ggplot(aes(
x = common_DETs, y = C_percent, fill = common_DETs
)) +
geom_violin() +
geom_boxplot(width = .1, coef = Inf, fill = 'white') +
scale_fill_manual(values = c('blue', 'gray', 'red'))

espresso_quantification_numCC |>
rstatix::wilcox_test(C_percent ~ common_DETs)
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 C_percent down other 540 35739 10616316. 0.0000627 1.25e-4 ***
## 2 C_percent down up 540 438 138110. 0.00000622 1.87e-5 ****
## 3 C_percent other up 35739 438 8477800 0.003 3 e-3 **